# reformat and merge prevalenc data
df_uk <- df_uk_covid %>% filter(time <=22) %>%
dplyr::rename(pers_o = open,
pers_c = sci,
pers_e = extra,
pers_a = agree,
pers_n = neuro) %>%
left_join(df_uk_popdens, by='ut_area') %>%
select(-ut_name.y, -ut_name.x) %>%
drop_na()
df_uk %>% head()
NA
nuts_london_inner <- c('UKI31','UKI32','UKI33','UKI34','UKI41',
'UKI42','UKI43','UKI44','UKI45')
nuts_london_outer <- c('UKI51','UKI52','UKI53','UKI54','UKI61',
'UKI62','UKI63','UKI71','UKI72','UKI73',
'UKI74','UKI75')
ut_london_inner <- c('E09000007','E09000001','E09000033','E09000013',
'E09000020','E09000032','E09000025','E09000012',
'E09000030','E09000014','E09000019','E09000023',
'E09000028','E09000022')
ut_london_outer <- c('E09000011','E09000004','E09000016','E09000002',
'E09000031','E09000026','E09000010','E09000006',
'E09000008','E09000029','E09000021','E09000024',
'E09000003','E09000005','E09000009','E09000017',
'E09000015','E09000018','E09000027')
df_uk_socdist = df_uk_socdist %>%
mutate(london = ifelse(nuts3 %in% nuts_london_inner, 'london_inner',
ifelse(nuts3 %in% nuts_london_outer, 'london_outer',
'country')))
df_uk = df_uk %>%
mutate(london = ifelse(ut_area %in% ut_london_inner, 'london_inner',
ifelse(ut_area %in% ut_london_outer, 'london_outer',
'country')))
df_uk
NA
df_uk %>% ggplot(aes(x=time, y=rate_day)) +
geom_point(aes(col=ut_area, size=popdens)) +
geom_smooth(method="loess", se=T) +
theme(legend.position="none") +
ggtitle("Overall prevalence over time")
pers <- c('pers_o', 'pers_c', 'pers_e', 'pers_a', 'pers_n')
for (i in pers){
gg <- df_uk %>% mutate(prev_tail = cut(.[[i]],
breaks = c(-Inf, quantile(.[[i]], 0.2), quantile(.[[i]], 0.8), Inf),
labels = c('lower tail', 'center', 'upper tail'))) %>%
filter(prev_tail != 'center') %>%
ggplot(aes(x=time, y=rate_day)) +
geom_point(aes(col=ut_area, size=popdens)) +
geom_smooth(method="loess", se=T) +
facet_wrap(~prev_tail) +
theme(legend.position="none") +
ggtitle(i)
print(gg)
}
df_uk %>% group_by(ut_area) %>%
summarize_if(is.numeric, mean, na.rm=T) %>%
select(-ut_area, -time) %>%
cor(use = 'pairwise.complete') %>% round(3)
pers_o pers_e pers_a pers_n pers_c frequ poptotal rate_day popdens
pers_o 1.000 0.734 -0.605 -0.144 -0.638 0.008 -0.097 0.646 0.801
pers_e 0.734 1.000 -0.413 -0.500 -0.294 0.065 -0.074 0.556 0.621
pers_a -0.605 -0.413 1.000 -0.195 0.641 0.131 0.199 -0.585 -0.741
pers_n -0.144 -0.500 -0.195 1.000 -0.351 -0.243 -0.124 -0.108 0.040
pers_c -0.638 -0.294 0.641 -0.351 1.000 0.220 0.225 -0.489 -0.729
frequ 0.008 0.065 0.131 -0.243 0.220 1.000 0.949 -0.082 -0.195
poptotal -0.097 -0.074 0.199 -0.124 0.225 0.949 1.000 -0.128 -0.252
rate_day 0.646 0.556 -0.585 -0.108 -0.489 -0.082 -0.128 1.000 0.773
popdens 0.801 0.621 -0.741 0.040 -0.729 -0.195 -0.252 0.773 1.000
df_uk
NA
# function calculates all relevant models
run_models <- function(y, lvl1_x, lvl2_x, lvl2_id, data, ctrls=F){
# subset data
data = data %>%
dplyr::select(all_of(y), all_of(lvl1_x), all_of(lvl2_x), all_of(lvl2_id),
popdens, all_of(y))
data = data %>%
dplyr::rename(y = all_of(y),
lvl1_x = all_of(lvl1_x),
lvl2_x = all_of(lvl2_x),
lvl2_id = all_of(lvl2_id)
)
# configure optimization procedure
ctrl_config <- lmeControl(opt = 'optim', maxIter = 100, msMaxIter = 100)
# baseline
baseline <- lme(fixed = y ~ 1, random = ~ 1 | lvl2_id,
data = data,
correlation = corAR1(),
control = ctrl_config,
method = 'ML')
# random intercept fixed slope
random_intercept <- lme(fixed = y ~ lvl1_x + lvl2_x,
random = ~ 1 | lvl2_id,
data = data,
correlation = corAR1(),
control = ctrl_config,
method = 'ML')
# random intercept random slope
random_slope <- lme(fixed = y ~ lvl1_x + lvl2_x,
random = ~ lvl1_x | lvl2_id,
data = data,
correlation = corAR1(),
control = ctrl_config,
method = 'ML')
# cross level interaction
interaction <- lme(fixed = y ~ lvl1_x * lvl2_x,
random = ~ lvl1_x | lvl2_id,
data = data,
correlation = corAR1(),
control = ctrl_config,
method = 'ML')
# create list with results
results <- list('baseline' = baseline,
"random_intercept" = random_intercept,
"random_slope" = random_slope,
"interaction" = interaction)
if (ctrls == 'dem' | ctrls == 'prev'){
# random intercept random slope
random_slope_ctrl_dem <- lme(fixed = y ~ lvl1_x + lvl2_x + popdens,
random = ~ lvl1_x | lvl2_id,
data = data,
correlation = corAR1(),
control = ctrl_config,
method = 'ML')
# cross level interaction
interaction_ctrl_main_dem <- lme(fixed = y ~ lvl1_x * lvl2_x + popdens,
random = ~ lvl1_x | lvl2_id,
data = data,
correlation = corAR1(),
control = ctrl_config,
method = 'ML')
# cross level interaction
interaction_ctrl_int_dem <- lme(fixed = y ~ lvl1_x * lvl2_x + lvl1_x * popdens,
random = ~ lvl1_x | lvl2_id,
data = data,
correlation = corAR1(),
control = ctrl_config,
method = 'ML')
# create list with results
results <- list('baseline' = baseline,
"random_intercept" = random_intercept,
"random_slope" = random_slope,
"interaction" = interaction,
"random_slope_ctrl_dem" = random_slope_ctrl_dem,
"interaction_ctrl_main_dem" = interaction_ctrl_main_dem,
"interaction_ctrl_int_dem" = interaction_ctrl_int_dem)
}
if (ctrls == 'prev'){
# random intercept random slope
random_slope_ctrl_prev <- lme(fixed = y ~ lvl1_x + lvl2_x + popdens + rate_day,
random = ~ lvl1_x + rate_day | lvl2_id,
data = data,
correlation = corAR1(),
control = ctrl_config,
method = 'ML')
# cross level interaction
interaction_ctrl_main_prev <- lme(fixed = y ~ lvl1_x * lvl2_x + popdens + rate_day,
random = ~ lvl1_x | lvl2_id,
data = data,
correlation = corAR1(),
control = ctrl_config,
method = 'ML')
# cross level interaction
interaction_ctrl_int_prev<- lme(fixed = y ~ lvl1_x * lvl2_x + lvl1_x * popdens + rate_day,
random = ~ lvl1_x + rate_day | lvl2_id,
data = data,
correlation = corAR1(),
control = ctrl_config,
method = 'ML')
# create list with results
results <- list('baseline' = baseline,
"random_intercept" = random_intercept,
"random_slope" = random_slope,
"interaction" = interaction,
"random_slope_ctrl_dem" = random_slope_ctrl_dem,
"interaction_ctrl_main_dem" = interaction_ctrl_main_dem,
"interaction_ctrl_int_dem" = interaction_ctrl_int_dem,
"random_slope_ctrl_prev" = random_slope_ctrl_prev,
"interaction_ctrl_main_prev" = interaction_ctrl_main_prev,
"interaction_ctrl_int_prev" = interaction_ctrl_int_prev)
}
if(ctrls == 'exp'){
# random intercept random slope
random_slope_exp <- lme(fixed = y ~ (lvl1_x + I(lvl1_x^2)) + lvl2_x,
random = ~ (lvl1_x + I(lvl1_x^2)) | lvl2_id,
data = data,
correlation = corAR1(),
control = ctrl_config,
method = 'ML')
# cross level interaction
interaction_exp <- lme(fixed = y ~ (lvl1_x + I(lvl1_x^2)) * lvl2_x,
random = ~ (lvl1_x + I(lvl1_x^2)) | lvl2_id,
data = data,
correlation = corAR1(),
control = ctrl_config,
method = 'ML')
# create list with results
results <- list('baseline' = baseline,
"random_intercept" = random_intercept,
"random_slope" = random_slope,
"interaction" = interaction,
"random_slope_exp" = random_slope_exp,
"interaction_exp" = interaction_exp)
}
return(results)
}
# extracts table with coefficients and tests statistics
extract_results <- function(models) {
models_summary <- models %>%
map(summary) %>%
map("tTable") %>%
map(as.data.frame) %>%
map(round, 10)
# %>% map(~ .[str_detect(rownames(.), 'Inter|lvl'),])
return(models_summary)
}
# calculates comparison of all models in model list
compare_models <- function(models) {
mdl_names <- models %>% names()
str = ''
for (i in mdl_names){
mdl_str <- paste('models$', i, sep = '')
if(i == 'baseline'){
str <- mdl_str
}else{
str <- paste(str, mdl_str, sep=', ')
}
}
anova_str <- paste0('anova(', str, ')')
mdl_comp <- eval(parse(text=anova_str))
rownames(mdl_comp) = mdl_names
return(mdl_comp)
}
lvl2_scaled_ut <- df_uk %>%
dplyr::select(-time, -frequ, -rate_day, -london) %>%
distinct() %>%
mutate_at(vars(-ut_area), scale)
lvl1_scaled_ut <- df_uk %>% select(ut_area, time, rate_day) %>%
mutate_at(vars(-ut_area, -time), scale)
df_uk_scaled <- plyr::join(lvl1_scaled_ut, lvl2_scaled_ut, by = 'ut_area')
head(df_uk_scaled)
df_uk_socdist
lvl2_scaled_nuts <- df_uk_socdist %>%
dplyr::select(-time, -date, -frequ, -london,
-socdist_tiles, -socdist_single_tile) %>%
distinct() %>%
mutate_at(vars(-nuts3), scale)
lvl1_scaled_nuts <- df_uk_socdist %>% select(nuts3, time, socdist_single_tile) %>%
mutate_at(vars(-nuts3, -time), scale)
df_uk_socdist_scaled <- plyr::join(lvl1_scaled_nuts, lvl2_scaled_nuts, by = 'nuts3')
head(df_uk_socdist_scaled)
NA
models_o_covid <-run_models(y = 'rate_day',
lvl1_x = 'time',
lvl2_x = 'pers_o',
lvl2_id = 'ut_area',
data = df_uk_scaled,
ctrls = 'dem')
extract_results(models_o_covid)
$baseline
$random_intercept
$random_slope
$interaction
$random_slope_ctrl_dem
$interaction_ctrl_main_dem
$interaction_ctrl_int_dem
compare_models(models_o_covid)
NA
models_c_covid <-run_models(y = 'rate_day',
lvl1_x = 'time',
lvl2_x = 'pers_c',
lvl2_id = 'ut_area',
data = df_uk_scaled,
ctrls = 'dem')
extract_results(models_c_covid)
$baseline
$random_intercept
$random_slope
$interaction
$random_slope_ctrl_dem
$interaction_ctrl_main_dem
$interaction_ctrl_int_dem
compare_models(models_c_covid)
NA
NA
models_e_covid <-run_models(y = 'rate_day',
lvl1_x = 'time',
lvl2_x = 'pers_e',
lvl2_id = 'ut_area',
data = df_uk_scaled,
ctrls = 'dem')
extract_results(models_e_covid)
$baseline
$random_intercept
$random_slope
$interaction
$random_slope_ctrl_dem
$interaction_ctrl_main_dem
$interaction_ctrl_int_dem
compare_models(models_e_covid)
NA
NA
models_a_covid <-run_models(y = 'rate_day',
lvl1_x = 'time',
lvl2_x = 'pers_a',
lvl2_id = 'ut_area',
data = df_uk_scaled,
ctrls = 'dem')
extract_results(models_a_covid)
$baseline
$random_intercept
$random_slope
$interaction
$random_slope_ctrl_dem
$interaction_ctrl_main_dem
$interaction_ctrl_int_dem
compare_models(models_a_covid)
NA
NA
models_n_covid <-run_models(y = 'rate_day',
lvl1_x = 'time',
lvl2_x = 'pers_n',
lvl2_id = 'ut_area',
data = df_uk_scaled,
ctrls = 'dem')
extract_results(models_n_covid)
$baseline
$random_intercept
$random_slope
$interaction
$random_slope_ctrl_dem
$interaction_ctrl_main_dem
$interaction_ctrl_int_dem
compare_models(models_n_covid)
NA
NA
models_o_covid_exp <-run_models(y = 'rate_day',
lvl1_x = 'time',
lvl2_x = 'pers_o',
lvl2_id = 'ut_area',
data = df_uk_scaled,
ctrls = 'exp')
extract_results(models_o_covid_exp)
$baseline
$random_intercept
$random_slope
$interaction
$random_slope_exp
$interaction_exp
compare_models(models_o_covid_exp)
NA
models_c_covid_exp <-run_models(y = 'rate_day',
lvl1_x = 'time',
lvl2_x = 'pers_c',
lvl2_id = 'ut_area',
data = df_uk_scaled,
ctrls = 'exp')
extract_results(models_c_covid_exp)
$baseline
$random_intercept
$random_slope
$interaction
$random_slope_exp
$interaction_exp
compare_models(models_c_covid_exp)
NA
models_e_covid_exp <-run_models(y = 'rate_day',
lvl1_x = 'time',
lvl2_x = 'pers_e',
lvl2_id = 'ut_area',
data = df_uk_scaled,
ctrls = 'exp')
extract_results(models_e_covid_exp)
$baseline
$random_intercept
$random_slope
$interaction
$random_slope_exp
$interaction_exp
compare_models(models_e_covid_exp)
NA
models_a_covid_exp <-run_models(y = 'rate_day',
lvl1_x = 'time',
lvl2_x = 'pers_a',
lvl2_id = 'ut_area',
data = df_uk_scaled,
ctrls = 'exp')
extract_results(models_a_covid_exp)
$baseline
$random_intercept
$random_slope
$interaction
$random_slope_exp
$interaction_exp
compare_models(models_a_covid_exp)
NA
models_n_covid_exp <-run_models(y = 'rate_day',
lvl1_x = 'time',
lvl2_x = 'pers_n',
lvl2_id = 'ut_area',
data = df_uk_scaled,
ctrls = 'exp')
extract_results(models_n_covid_exp)
$baseline
$random_intercept
$random_slope
$interaction
$random_slope_exp
$interaction_exp
compare_models(models_n_covid_exp)
NA
# prevalence
models_prev <- list(models_o_covid,
models_c_covid,
models_e_covid,
models_a_covid,
models_n_covid)
sum_tab_prev <- summary_table(models_prev, dv_name = 'prev')
write.table(sum_tab_prev, quote=F)
Value Std.Error DF t-value p-value
prev~o*time_crtl_popdens 0.0446 0.0025 3085 17.7883 0
prev~c*time_crtl_popdens -0.0385 0.0025 3085 -15.1323 0
prev~e*time_crtl_popdens 0.037 0.0026 3085 14.4632 0
prev~a*time_crtl_popdens -0.0424 0.0025 3085 -16.806 0
prev~n*time_crtl_popdens -0.0031 0.0026 3085 -1.1772 0.2392
prev~o*time_crtl_popdens*time -0.0018 0.004 3084 -0.4332 0.6649
prev~c*time_crtl_popdens*time 0.0057 0.0035 3084 1.6178 0.1058
prev~e*time_crtl_popdens*time 0.003 0.0031 3084 0.9744 0.33
prev~a*time_crtl_popdens*time -0.0012 0.0036 3084 -0.3446 0.7304
prev~n*time_crtl_popdens*time -0.0054 0.0024 3084 -2.2262 0.0261
# social distancing
models_socdist <- list(models_o_socdist,
models_c_socdist,
models_e_socdist,
models_a_socdist,
models_n_socdist)
sum_tab_socdist <- summary_table(models_socdist, dv_name = 'socdist')
write.table(sum_tab_socdist, quote=F)
Value Std.Error DF t-value p-value
socdist~o*time_crtl_popdens -0.003 0.0016 2858 -1.8795 0.0603
socdist~c*time_crtl_popdens 0.0025 0.0016 2858 1.566 0.1175
socdist~e*time_crtl_popdens -0.0034 0.0016 2858 -2.125 0.0337
socdist~a*time_crtl_popdens 0.0026 0.0016 2858 1.6029 0.1091
socdist~n*time_crtl_popdens 7e-04 0.0016 2858 0.4622 0.644
socdist~o*time_crtl_popdens*time 1e-04 0.0026 2857 0.0494 0.9606
socdist~c*time_crtl_popdens*time -7e-04 0.0024 2857 -0.3057 0.7599
socdist~e*time_crtl_popdens*time -0.0015 0.0021 2857 -0.7327 0.4638
socdist~a*time_crtl_popdens*time -6e-04 0.0024 2857 -0.2436 0.8076
socdist~n*time_crtl_popdens*time 8e-04 0.0016 2857 0.5063 0.6127
df_uk_slope_prev %>% ggplot(aes(slope_prev)) + geom_histogram(bins = 100)
df_uk_slope_socdist %>% ggplot(aes(slope_socdist)) + geom_histogram(bins = 100)
ctrls <- cforest_unbiased(ntree=500, mtry=5)
crf_o_fit_prev <- cforest(slope_prev ~ pers_o + women + academics +
hospital_beds + gdp + manufact +
airport + age + popdens,
df_ger_slope_prev[-1],
controls = ctrls)
crf_o_varimp_prev <- varimp(crf_o_fit_prev, nperm = 1)
crf_o_varimp_cond_prev <- varimp(crf_o_fit_prev, conditional = T, nperm = 1)
crf_o_varimp_prev %>% as.data.frame() %>% rownames_to_column('variable') %>%
ggplot(aes(x=variable, y=.)) +
geom_bar(stat = 'identity') +
theme(axis.text.x = element_text(angle = 90))
crf_o_varimp_cond_prev %>% as.data.frame() %>% rownames_to_column('variable') %>%
ggplot(aes(x=variable, y=.)) +
geom_bar(stat = 'identity') +
theme(axis.text.x = element_text(angle = 90))
ctrls <- cforest_unbiased(ntree=500, mtry=5)
crf_c_fit_prev <- cforest(slope_prev ~ pers_c + women + academics +
cdu + afd + hospital_beds + gdp + manufact +
airport + age + popdens,
df_ger_slope_prev[-1],
controls = ctrls)
crf_c_varimp_prev <- varimp(crf_c_fit_prev, nperm = 1)
crf_c_varimp_cond_prev <- varimp(crf_c_fit_prev, conditional = T, nperm = 1)
crf_c_varimp_prev %>% as.data.frame() %>% rownames_to_column('variable') %>%
ggplot(aes(x=variable, y=.)) +
geom_bar(stat = 'identity') +
theme(axis.text.x = element_text(angle = 90))
crf_c_varimp_cond_prev %>% as.data.frame() %>% rownames_to_column('variable') %>%
ggplot(aes(x=variable, y=.)) +
geom_bar(stat = 'identity') +
theme(axis.text.x = element_text(angle = 90))
ctrls <- cforest_unbiased(ntree=500, mtry=5)
crf_e_fit_prev <- cforest(slope_prev ~ pers_e + women + academics +
cdu + afd + hospital_beds + gdp + manufact +
airport + age + popdens,
df_ger_slope_prev[-1],
controls = ctrls)
crf_e_varimp_prev <- varimp(crf_e_fit_prev, nperm = 1)
crf_e_varimp_cond_prev <- varimp(crf_e_fit_prev, conditional = T, nperm = 1)
crf_e_varimp_prev %>% as.data.frame() %>% rownames_to_column('variable') %>%
ggplot(aes(x=variable, y=.)) +
geom_bar(stat = 'identity') +
theme(axis.text.x = element_text(angle = 90))
crf_e_varimp_cond_prev %>% as.data.frame() %>% rownames_to_column('variable') %>%
ggplot(aes(x=variable, y=.)) +
geom_bar(stat = 'identity') +
theme(axis.text.x = element_text(angle = 90))
ctrls <- cforest_unbiased(ntree=500, mtry=5)
crf_a_fit_prev <- cforest(slope_prev ~ pers_a + women + academics +
cdu + afd + hospital_beds + gdp + manufact +
airport + age + popdens,
df_ger_slope_prev[-1],
controls = ctrls)
crf_a_varimp_prev <- varimp(crf_a_fit_prev, nperm = 1)
crf_a_varimp_cond_prev <- varimp(crf_a_fit_prev, conditional = T, nperm = 1)
crf_a_varimp_prev %>% as.data.frame() %>% rownames_to_column('variable') %>%
ggplot(aes(x=variable, y=.)) +
geom_bar(stat = 'identity') +
theme(axis.text.x = element_text(angle = 90))
crf_a_varimp_cond_prev %>% as.data.frame() %>% rownames_to_column('variable') %>%
ggplot(aes(x=variable, y=.)) +
geom_bar(stat = 'identity') +
theme(axis.text.x = element_text(angle = 90))
ctrls <- cforest_unbiased(ntree=500, mtry=5)
crf_n_fit_prev <- cforest(slope_prev ~ pers_n + women + academics +
cdu + afd + hospital_beds + gdp + manufact +
airport + age + popdens,
df_ger_slope_prev[-1],
controls = ctrls)
crf_n_varimp_prev <- varimp(crf_n_fit_prev, nperm = 1)
crf_n_varimp_cond_prev <- varimp(crf_n_fit_prev, conditional = T, nperm = 1)
crf_n_varimp_prev %>% as.data.frame() %>% rownames_to_column('variable') %>%
ggplot(aes(x=variable, y=.)) +
geom_bar(stat = 'identity') +
theme(axis.text.x = element_text(angle = 90))
crf_n_varimp_cond_prev %>% as.data.frame() %>% rownames_to_column('variable') %>%
ggplot(aes(x=variable, y=.)) +
geom_bar(stat = 'identity') +
theme(axis.text.x = element_text(angle = 90))
Social distancing